* D:\E\replications\BJPS2020\rawdata\echp\echp_yale.do

* cd D:\E\replications\BJPS2020/
clear all
cap log close
log using ./rawdata/echp/echp_yale_data.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

// Data-set build: 
	/*
	// This was done by Stuart at Yale
	* cd Z:\esi-684001-isps\shared\eurostat\ECHP
	* cd C:\Users\Stuart\Documents\eurostat/ECHP

	clear all

	foreach v in h p r rel {
		forvalues j=1/8 {
			clear
			insheet using a_w`j'`v'.csv, comma clear
			gen wwave=`j'
			gen ffile="`v'"
			gen llocation="a_w`j'`v'"
			*tempfile a_w`j'`v'
			compress
			*save `a_w`j'`v''
			saveold `v'_`j'.dta
		}
	}
	clear
	cap noisily log using append.txt, replace
	foreach v in h p r rel {
		clear
		forvalues j=1/8 {
			*append using `a_w`j'`v''
			append using `v'_`j'.dta
		}
		compress
		saveold `v'.dta
	}

	* zip files h.dta p.dta r.dta rel.dta into echp.zip
	zipfile h.dta p.dta r.dta rel.dta, saving(echp)
	cap log close
	
	*/
	* Person-level files
	use ./rawdata/echp/licensed/p.dta, clear
	isid country wave hid pid
	* HH-level files
	merge m:1 country wave hid using ./rawdata/echp/licensed/h.dta
	tab _merge
	compress
	drop if pid==.
	drop _merge

	gen ccode=.
	replace ccode=	2550	if country==	1 // 1 Germany (original ECHP survey)
	replace ccode=	255	if country==	51 // 51 Germany (ECHP based on national survey-SOEP)
	replace ccode=	390	if country==	2 // 2 Denmark
	replace ccode=	210	if country==	3 // 3 The Netherlands	
	replace ccode=	211	if country==	4 // 4 Belgium
	replace ccode=	212	if country==	55 // 55 Luxembourg (ECHP based on national survey-PSELL)
	replace ccode=	2120	if country==	5 // 5 Luxembourg (original ECHP survey)
	replace ccode=	220	if country==	6 // 6 France
	replace ccode=	2000	if country==	7 // 7 United-Kingdom (original ECHP survey)
	replace ccode=	200	if country==	57 // 57 United-Kingdom (ECHP based on national survey-BHPS)
	replace ccode=	205	if country==	8 //   8 Ireland
	replace ccode=	325	if country==	9 //   9 Italy
	replace ccode=	350	if country==	10 //   10 Greece
	replace ccode=	230	if country==	11 //   11 Spain
	replace ccode=	235	if country==	12 //   12 Portugal
	replace ccode=	305	if country==	13 //   13 Austria
	replace ccode=	375	if country==	14 //   14 Finland
	replace ccode=	380	if country==	15 //   15 Sweden

	label define ccode 255 "Germany (ECHP-GSOEP)" 390 "Denmark (ECHP)" 210 "Netherlands (ECHP)" 211 "Belgium (ECHP)" ///
		212 "Luxembourg (ECHP-PSELL)" 220 "France (ECHP)" 200 "UK (ECHP-BHPS)" 205 "Ireland (ECHP)" ///
		325 "Italy (ECHP)" 350 "Greece (ECHP)" 230 "Spain (ECHP)" 235 "Portugal (ECHP)" 305 "Austria (ECHP)" ///
		375 "Finland (ECHP)" 380 "Sweden (ECHP)" 2550 "Germany (ECHP)" 2120 "Luxembourg (ECHP)" 2000 "UK (ECHP)", modify 
	label values ccode ccode

	cap noisily do echp_labels.do

	cap drop year
	gen year =.
	replace year=	1994	if wave==	1
	replace year=	1995	if wave==	2
	replace year=	1996	if wave==	3
	replace year=	1997	if wave==	4
	replace year=	1998	if wave==	5
	replace year=	1999	if wave==	6
	replace year=	2000	if wave==	7
	replace year=	2001	if wave==	8

	isid ccode year pid

	clonevar original_pid=pid
	clonevar original_hid=hid

	replace pid=_n if ccode==380 // assigning unique ids for Sweden, which is not panel
	egen id = group(ccode pid) // this is a unique person-id, unique across countries
	label var id "Truly unique person ID (ccode, pid)"

	bys id: gen obs=_N
	label var obs "# of years P is in panel" 

	compress
	xtset id year, yearly

	order ccode year id pid hid country wave
	compress

	* missing values
	mvdecode hi020 hi100 ///
		hi110 hi111 hi112 hi120 hi121 hi123 ///
		hi130 hi131 hi132 hi133 hi134 hi135 hi136 hi137 hi138 hi140 ///
		, mv(-8=.a \ -9=.b)
	label define income_mv .a "not applicable" .b "missing", modify

	clonevar educ=pt022 if inrange(pt022,1,3)
	label define educ ///
		1 "Recognised third level education (ISCED 5-7)" ///
		2 "Second stage of secondary level education (ISCED 3)" ///
		3 "Less than second stage of secondary education (ISCED 0-2)", modify
	label val educ educ
	cap drop isced3
	vreverse educ, gen(isced3)

	* age
	gen age=year-pd001
	label var age "Age of person [year-pd001]"

	* gender
	gen female=pd004-1 if pd004>0
	label var female "Dummy for female [pd004]"

	* CPI
	replace year=year-1
	cap drop _merge
	merge m:1 ccode year using ./rawdata/echp/cpi_eurostat.dta
		// in 2000
	drop if _merge==2
	drop _merge

	cap drop dataset
	gen dataset="ECHP"
	
	note: ./rawdata/echp/echp_yale.do
	note: ./rawdata/echp/licensed/echp_h-and-p.dta
	note: Created on `= c(current_date)'
	saveold ./rawdata/echp/licensed/echp_h-and-p.dta, replace

display "$S_TIME  $S_DATE"
cap log close


****************
* Calculate ESIs
****************

cap log close
log using ./rawdata/echp/echp_yale_ESIs.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

clear
use if inlist(ccode,200,205,210,211,212,220,230,235,255,305,325,350,375,390) ///
	using ./rawdata/echp/licensed/echp_h-and-p.dta, clear

* weight
cap drop weight
gen weight=pg002
label var weight "Individua level weight [pg002]"

* tax-rate:
gen t=1-hi020 if hi020>=0 & hi020<.
label var t "HH tax rate (1-hi020)"

* market income
cap drop mi_n mi
egen mi_n=rowtotal(hi110 hi120) 
* hi110=hi111 + hi112
* hi120=hi121 + hi122 + hi123
replace mi_n=mi_n*hi020 if (ccode==220 | ccode==375) // FRA and FI are gross
gen mi=mi_n/hi020 if hi020>=0 // Lux is missing
label var mi "HH market income"

* disposable income
egen di=rowtotal(mi_n hi130)
replace di=mi_n+(hi130*hi020) if (ccode==220 | ccode==375) // transfers in gross in FRA and FIN
* hi140 is gross in FRA, net in FIN
replace di=. if mi_n==.
label var di "HH disposable income"

compare di hi100 if hi140==0

* equivalence scales
cap drop famsize
gen famsize=hd001
label var famsize "HH size [hd001, HHsize_psid]"
clonevar HHsize=famsize 

cap drop equivalence
gen equivalence=sqrt(HHsize)

* equivalize
foreach v in mi di hi100 {
	cap drop `v'_e
	gen `v'_e=`v'/equivalence if `v'>0
}

sum mi_e di_e hi100_e

* top- and bottom-coding, then CPI
set more off
foreach v in mi_e di_e hi100_e {
	bys ccode year: egen p1_`v'=wpctile(`v'), p(1) weights(weight)
	bys ccode year: egen p99_`v'=wpctile(`v'), p(99) weights(weight)
	replace `v'=p1_`v' if `v'<p1_`v' // bottom-code at p1
	replace `v'=p99_`v' if `v'>p99_`v' & `v'<. // top-code at p99
	replace `v'=`v'/(cpi/100) // express in 2000 NCU
}

sum mi_e di_e hi100_e

*foreach v in mi_e di_e hi100_e {
*	egen gini_`v' = inequal(`v'), by(ccode year) weight(weight) index(gini)
*}

sum mi_e di_e hi100_e
egen group=group(ccode year), label	
cap drop ESI*
xtset id year, yearly
foreach v in mi_e di_e hi100_e {
	xtset id year, yearly
	egen gini_`v'_2560 = inequal(`v') if age>=25 & age<=60, by(group) weight(weight) index(gini)
	egen gini_`v'_all = inequal(`v'), by(group) weight(weight) index(gini)	
	cap drop d1_`v' d`v'
	gen d1_`v'=d1.`v'/abs(l1.`v') if `v'<. & l1.`v'<.
	label var d1_`v' "Growth in `v'"
	gen d`v'=d1.`v'/(abs(l1.`v'+`v')/2) if `v'<. & l1.`v'<. // arc changes
	label var d`v' "Arc change of `v'"
	gen ESI25_`v'=(d`v'<=-0.25) if d`v'<.
	gen ESI50_`v'=(d`v'<=-0.5) if d`v'<.
	gen ESI10_`v'=(d`v'<=-0.1) if d`v'<.
	gen ESI00_`v'=(d`v'<0) if d`v'<.
	gen upESI25_`v'=(d`v'>=0.25) if d`v'<.
	gen upESI50_`v'=(d`v'>=0.5) if d`v'<.
	gen upESI10_`v'=(d`v'>=0.1) if d`v'<.
	gen upESI00_`v'=(d`v'>0) if d`v'<.

	gen gESI25_`v' =(d1_`v'<=-0.25) if d1_`v'<.
	gen gESI50_`v' =(d1_`v'<=-0.5) if d1_`v'<.
	gen gESI10_`v' =(d1_`v'<=-0.1) if d1_`v'<.
	gen gESI00_`v' =(d1_`v'<0) if d1_`v'<.
	gen upgESI25_`v'=(d1_`v'>=0.25) if d1_`v'<.
	gen upgESI50_`v'=(d1_`v'>=0.5)  if d1_`v'<.
	gen upgESI10_`v'=(d1_`v'>=0.1)  if d1_`v'<.
	gen upgESI00_`v'=(d1_`v'>0)  if d1_`v'<.
	
	if "`v'"=="hi100_e"  local foo "Total net HH income (ECHP)"
	else if "`v'"=="mi_e" local foo "Market HH income"
	else if "`v'"=="di_e" local foo "Disposable HH income"
	label var gini_`v'_2560 "Gini if `foo', ages 25-60"
	label var gini_`v'_all "Gini if `foo', ages all"
	foreach j in 00 10 25 50 {
		label var ESI`j'_`v' "`j'+% arc drop, `foo'"
		label var upESI`j'_`v' "`j'+% arc gain, `foo'"
		label var gESI`j'_`v' "`j'+% drop, `foo'"
		label var upgESI`j'_`v' "`j'+% gain, `foo'"
	}
}

tabstat gini_* ESI25_* [aw=weight] , by(ccode) f(%4.3f)
tabstat gini_* ESI25_* [aw=weight] if age>=25 & age<=60, by(ccode) f(%4.3f)

// Joint incidence of MI + DI drops
foreach j in 00 10 25 50 {
	egen compESI`j'=group(ESI`j'_mi_e ESI`j'_di_e), label
	egen compgESI`j'=group(gESI`j'_mi_e gESI`j'_di_e), label	
	label var compESI`j' "group(ESI`j'_mi_e ESI`j'_di_e)"
	label var compgESI`j' "group(gESI`j'_mi_e gESI`j'_di_e)"
}
label list compESI25

foreach j in 00 10 25 50 {
	foreach m in 1 2 3 4 {
		gen RR`j'_`m'= (compESI`j'==`m') if compESI`j'<.
		gen RR`j'_g`m'=(compgESI`j'==`m') if compgESI`j'<.
	}
	label var RR`j'_1  "MI+DI drops (ESI`j'_mi_e=0 + ESI`j'_di_e==0)"
	label var RR`j'_2  "MI+DI drops (ESI`j'_mi_e=0 + ESI`j'_di_e==1)"
	label var RR`j'_3  "MI+DI drops (ESI`j'_mi_e=1 + ESI`j'_di_e==0)"
	label var RR`j'_4  "MI+DI drops (ESI`j'_mi_e=1 + ESI`j'_di_e==1)"
	label var RR`j'_g1 "MI+DI drops (gESI`j'_mi_e=0 + gESI`j'_di_e==0)"
	label var RR`j'_g2 "MI+DI drops (gESI`j'_mi_e=0 + gESI`j'_di_e==1)"
	label var RR`j'_g3 "MI+DI drops (gESI`j'_mi_e=1 + gESI`j'_di_e==0)"
	label var RR`j'_g4 "MI+DI drops (gESI`j'_mi_e=1 + gESI`j'_di_e==1)"
}

foreach v in mi_e di_e hi100_e {
	clonevar my_`v'=`v'
	clonevar sd_`v'=`v'
	label var my_`v' "`: var label `v'' (mean)"
	label var sd_`v' "`: var label `v'' (SD)"
}

preserve

	foreach j in 00 10 25 50 {
		foreach v in mi_e di_e hi100_e {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}

	foreach v of varlist _all {
		local l`v': var label `v'
	}

	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_*e d1_*e [aw=weight], by(ccode year)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen dataset="ECHP"
	isid ccode year
	compress
	gen ages="all"
	compress
	note: .\rawdata\echp\echp_yale.do
	note: Created on `= c(current_date)'
	save .\rawdata\echp\ESIs_echp.dta, replace
restore

preserve
	keep if age>=25 & age<=60
	foreach j in 00 10 25 50 {
		foreach v in mi_e di_e hi100_e {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}

	foreach v of varlist _all {
		local l`v': var label `v'
	}
	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_*e d1_*e [aw=weight], by(ccode year)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen dataset="ECHP"
	isid ccode year
	compress
	gen ages="25-60"
	compress
	note: .\rawdata\echp\echp_yale.do
	note: Created on `= c(current_date)'
	save .\rawdata\echp\ESIs_ages25-60_echp.dta, replace
restore


display "$S_TIME  $S_DATE"
cap log close
